data_student_teacher_expenditure<-data_student_teacher_exp_raw

#functions for percentiles
perc.rank <- function(x) {
  y<-rank(x)/length(x)
  return(y)}

#compute student component percentiles
data_student_teacher_expenditure$mandatory.student<-NA
data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$disciplina_obligatorie_scris_final),]<-data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$disciplina_obligatorie_scris_final),] %>%
  group_by(an,mandatory_subject_student) %>%
  mutate(mandatory.student=perc.rank(disciplina_obligatorie_scris_final))

data_student_teacher_expenditure$elective.student<-NA
data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$disciplina_alegere_aria_culiculara_final),]<-data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$disciplina_alegere_aria_culiculara_final),] %>%
  group_by(an,elective_subject_student) %>%
  mutate(elective.student=perc.rank(disciplina_alegere_aria_culiculara_final))

data_student_teacher_expenditure$romanian.student<-NA
data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$lb_romana_final),]<-data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$lb_romana_final),] %>%
  group_by(an) %>%
  mutate(romanian.student=perc.rank(lb_romana_final))



#split data by subject
data_student_teacher_expenditure_long<-data_student_teacher_expenditure %>%
  ungroup() %>%
  mutate(id=seq.int(nrow(data_student_teacher_expenditure))) %>%
  rename(romanian.teacher=teacher_perc.ro,
         elective.teacher=teacher_perc.elect,
         mandatory.teacher=teacher_perc.mand,
         romanian.gpa=gpa_perc.ro,
         elective.gpa=gpa_perc.elect,
         mandatory.gpa=gpa_perc.mand,
         romanian.Experience=Experience.ro,
         elective.Experience=Experience.elect,
         mandatory.Experience=Experience.mand,
         romanian.Education=Education_num.ro,
         elective.Education=Education_num.elect,
         mandatory.Education=Education_num.mand,
         romanian.Teacher_Category_num=Teacher_Category_num.ro,
         elective.Teacher_Category_num=Teacher_Category_num.elect,
         mandatory.Teacher_Category_num=Teacher_Category_num.mand) %>%
  select(id_bac,id_adm,n_hs_town_group,grad_perc,entrance_perc,
         elective_subject_student,mandatory_subject_student,
         school_harmonized,unitate_de_invatamant,liceu_repartizat,scoala_de_provenienta,
         an,judet_bac,town,town_hs_bac,id,dec,Expenditure,quart_town,dec_town,n_hs_town_group,
         class_mean,school_mean,class_mean_yr,school_mean_yr,
         n_students_hs_yr,specializare_bac,specializare_bac2,specializare_adm,specializare_lb,
         entrance_perc_math,entrance_perc_ro,entrance_perc_exam,entrance_perc_gpa,
         media,media_la_admitere,n_students_town_yr,
         Cod_SIIIR_hs_bac,Cod_SIRUTA_hs_bac,
         "romanian.student","romanian.teacher",romanian.gpa,romanian.Experience,romanian.Teacher_Category_num,
         #"romanian.class_mean","romanian.class_mean_yr","romanian.school_mean","romanian.school_mean_yr",
         "elective.student","elective.teacher",elective.gpa,elective.Experience,elective.Teacher_Category_num,
         #"elective.class_mean","elective.class_mean_yr","elective.school_mean","elective.school_mean_yr",
         "mandatory.student","mandatory.teacher",mandatory.gpa,mandatory.Experience,mandatory.Teacher_Category_num,
         romanian.Education,elective.Education,mandatory.Education,
         school_change, Unemployment_hs_bac,Wages_hs_bac,drop_hs_hs_bac
         #"mandatory.class_mean","mandatory.class_mean_yr","mandatory.school_mean","mandatory.school_mean_yr",
  )
data_student_teacher_expenditure_long<-as.data.frame(data_student_teacher_expenditure_long)
test<-data_student_teacher_expenditure_long[sample(nrow(data_student_teacher_expenditure_long), 100),] %>% filter(!is.na(grad_perc))


data_student_teacher_expenditure_long<-reshape(data_student_teacher_expenditure_long,
                                               direction='long',
                                               varying=list(student=c("romanian.student","elective.student","mandatory.student"),
                                                            teacher=c("romanian.teacher","elective.teacher","mandatory.teacher"),
                                                            gpa=c("romanian.gpa","elective.gpa","mandatory.gpa"),
                                                            Experience=c("romanian.Experience","elective.Experience","mandatory.Experience"),
                                                            Teacher_Category_num=c("romanian.Teacher_Category_num","elective.Teacher_Category_num","mandatory.Teacher_Category_num"),
                                                            Education_num=c("romanian.Education","elective.Education","mandatory.Education")),
                                               timevar='subject',
                                               times=c('romanian', 'elective','mandatory'),
                                               v.names=c('student', 'teacher_test','teacher_gpa','teacher_exp','teacher_rank','teacher_education'),
                                               #,"class_mean","school_mean","class_mean_yr","school_mean_yr"),
                                               idvar="id")


# add elective/mandatory subjects
data_student_teacher_expenditure_long<-data_student_teacher_expenditure_long %>%
  mutate(subject2=ifelse(subject=='romanian','romanian','Other')) %>%
  mutate(subject2=ifelse(subject=='elective',elective_subject_student,subject2)) %>%
  mutate(subject2=ifelse(subject=='mandatory',mandatory_subject_student,subject2))
data_student_teacher_expenditure_long<-data_student_teacher_expenditure_long %>%
  mutate(subject2=ifelse(subject=='elective' & is.na(subject2),'Other Elective',subject2)) %>%
  mutate(subject2=ifelse(subject=='mandatory' & is.na(subject2),'Other Mandatory',subject2)) %>%
  mutate(subject2=ifelse(subject=='mandatory',mandatory_subject_student,subject2))

data_student_teacher_expenditure_long<-data_student_teacher_expenditure_long %>% 
  mutate(quart_teacher=cut(teacher_test, breaks = c(-Inf, 0.25,0.5,0.75, Inf), 
                           labels = c('1','2','3','4'), right = FALSE))

#get school/track mean over time using harmoized school/track codes
data_student_teacher_expenditure_long<-data_student_teacher_expenditure_long %>%
  group_by(school_harmonized) %>%
  mutate(school_mean_math=mean(entrance_perc_math,na.rm=T),
         school_mean_ro=mean(entrance_perc_ro,na.rm=T)) %>%
  group_by(school_harmonized,an) %>%
  mutate(school_mean_math_yr=mean(entrance_perc_math,na.rm=T),
         school_mean_ro_yr=mean(entrance_perc_ro,na.rm=T))


data_reg_filtered<-data_student_teacher_expenditure_long  %>% na.omit %>% filter(school_change==F)

data_reg_filtered<-data_reg_filtered %>% 
  na.omit %>% 
  filter(school_change==F) %>%
  mutate(student=student*100,
         teacher_test=teacher_test*100,
         entrance_perc=entrance_perc*100,
         entrance_perc_ro=entrance_perc_ro*100,
         entrance_perc_math=entrance_perc_math*100)
data_reg_filtered<-data_reg_filtered %>%
  mutate(class_mean=class_mean*100,
         class_mean_yr=class_mean_yr*100,
         school_mean_yr=school_mean_yr*100,
         school_mean=school_mean*100)


data_reg_filtered<-data_reg_filtered %>%
  group_by(an,specializare_adm,specializare_lb) %>%
  mutate(n=n())

gc()



### School: Baseline
# First, run a baseline regression:
baseline_school<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town|
                         as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                         school_mean~dec_town*n_hs_town_group,
                       cluster=~as.factor(Cod_SIRUTA_hs_bac),
                       data=data_reg_filtered)
summary(baseline_school)


### School: One factor at a time
#Add teacher:
baseline_school_teacher<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+teacher_test|
                                 as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                                 school_mean~dec_town*n_hs_town_group,
                               cluster=~as.factor(Cod_SIRUTA_hs_bac),
                               data=data_reg_filtered)
summary(baseline_school_teacher)


#Add peer:
baseline_school_peer<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+I(school_mean_yr-school_mean)|
                              as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                              school_mean~dec_town*n_hs_town_group,
                            cluster=~as.factor(Cod_SIRUTA_hs_bac),
                            data=data_reg_filtered)
summary(baseline_school_peer)


#Add expenditure:
baseline_school_expenditure<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+I(Expenditure/n_students_hs_yr/1000)|  as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                                     school_mean~dec_town*n_hs_town_group,
                                   cluster=~as.factor(Cod_SIRUTA_hs_bac),
                                   data=data_reg_filtered)
summary(baseline_school_expenditure)



### School: Two and Three Factors
#Peers plus teachers:
baseline_school_teacher_peer<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+teacher_test+I(school_mean_yr-school_mean)|  as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                                      school_mean~dec_town*n_hs_town_group,
                                    cluster=~as.factor(Cod_SIRUTA_hs_bac),
                                    data=data_reg_filtered)
summary(baseline_school_teacher_peer)


#Peer plus teachers plus expenditures:
baseline_school_all<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+teacher_test+I(school_mean_yr-school_mean)+I(Expenditure/n_students_hs_yr/1000)|  as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                             school_mean~dec_town*n_hs_town_group,
                           cluster=~as.factor(Cod_SIRUTA_hs_bac),
                           data=data_reg_filtered)
summary(baseline_school_all)



### Track: Baseline
#First, run a baseline regression:
baseline_track<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town|
                        as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                        class_mean~dec_town*n_hs_town_group,
                      cluster=~as.factor(Cod_SIRUTA_hs_bac),
                      data=data_reg_filtered)
summary(baseline_track)


### Track: One factor at a time
#Add teacher:
baseline_track_teacher<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+teacher_test|
                                as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                                class_mean~dec_town*n_hs_town_group,
                              cluster=~as.factor(Cod_SIRUTA_hs_bac),
                              data=data_reg_filtered)
summary(baseline_track_teacher)


#Add peer:
baseline_track_peer<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+I(class_mean_yr-class_mean)|
                             as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                             class_mean~dec_town*n_hs_town_group,
                           cluster=~as.factor(Cod_SIRUTA_hs_bac),
                           data=data_reg_filtered)
summary(baseline_track_peer)


#Add expenditure:
baseline_track_expenditure<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+I(Expenditure/n_students_hs_yr/1000)|  as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                                    class_mean~dec_town*n_hs_town_group,
                                  cluster=~as.factor(Cod_SIRUTA_hs_bac),
                                  data=data_reg_filtered)
summary(baseline_track_expenditure)



### Track: Two and Three Factors
#Peers plus teachers:
baseline_track_teacher_peer<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+teacher_test+I(class_mean_yr-class_mean)|  as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                                     class_mean~dec_town*n_hs_town_group,
                                   cluster=~as.factor(Cod_SIRUTA_hs_bac),
                                   data=data_reg_filtered)
summary(baseline_track_teacher_peer)


#Peer plus teachers plus expenditures:
baseline_track_all<-feols(student~entrance_perc_ro+entrance_perc_math+dec_town+n_hs_town_group+n_students_town_yr*dec_town+teacher_test+I(class_mean_yr-class_mean)+I(Expenditure/n_students_hs_yr/1000)|  as.factor(an)+as.factor(Cod_SIRUTA_hs_bac)+scoala_de_provenienta+judet_bac+Unemployment_hs_bac+Wages_hs_bac+drop_hs_hs_bac|
                            class_mean~dec_town*n_hs_town_group,
                          cluster=~as.factor(Cod_SIRUTA_hs_bac),
                          data=data_reg_filtered)
summary(baseline_track_all)



### Latex
variables<-c('Timing'='Year',entrance_perc='Admission Score')
f_big<-function(x) format(x, big.mark=",", scientific=FALSE, nsmall=1,digits=1)
f <- function(x) format(round(x/1000000,1) , nsmall = 1)


current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
fileConn<-file("10_table_08.txt")
writeLines(
  print(modelsummary(list("Baseline"=baseline_school,
                          "Teacher"=baseline_school_teacher,
                          "Peer"=baseline_school_peer,
                          "Expenditure"=baseline_school_expenditure,
                          "T+P"=baseline_school_teacher_peer,
                          "T+P+E"=baseline_school_all),
                     statistic = "std.error",
                     estimate="{estimate}{stars}",
                     stars=c('^{*}'=0.1,'^{**}'=0.05,'^{***}'=0.01),
                     gof_map=list(list("raw" = "nobs", "clean" = "N", "fmt" = f_big),
                                  list("raw" = "r.squared", "clean" = "R$^2$",fmt="%.2f")),
                     #gof_map=c("nobs", "r2"),
                     metrics="R2",
                     output="latex",
                     escape=F)
  ), fileConn)
close(fileConn)







current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
fileConn<-file("10_table_08_track_appendix.txt")
writeLines(
  print(modelsummary(list("Baseline"=baseline_track,
                          "Teacher"=baseline_track_teacher,
                          "Peer"=baseline_track_peer,
                          "Spending"=baseline_track_expenditure,
                          "T+P"=baseline_track_teacher_peer,
                          "T+P+S"=baseline_track_all),
                     statistic = "std.error",
                     estimate="{estimate}{stars}",
                     stars=c('$^{*}$'=0.1,'$^{**}$'=0.05,'$^{***}$'=0.01),
                     gof_map=list(list("raw" = "nobs", "clean" = "N", "fmt" = f_big),
                                  list("raw" = "r.squared", "clean" = "R$^2$",fmt="%.2f")),
                     metrics="R2",
                     output="latex",
                     escape=F)
  ), fileConn)
close(fileConn)